# coding:utf-8

import jieba
from jieba import analyse

# [{'中国':20},{'人民':19},{'很行':10}]
from sentiment import stopwords


# ['':10,'':9]
def word_freq(txt:str, max:int):
    '''
    [{'':6},...]
    :param txt:
    :param max:
    :return:
    '''
    words = jieba.lcut(txt)
    counts = {}
    for word in words:
        if word in stopwords or len(word)==1:
            continue
        else:
            counts[word] = counts.get(word, 0)+1
    items = list(counts.items())
    items.sort(key=lambda  x:x[1], reverse=True)
    result = []
    for item in items[:max]:
        result.append({item[0]:item[1]})

    return result

# txt = open('6.txt', encoding='utf-8').read()
# result = word_freq(txt, 10)
# print(result)

if __name__ == '__main__':
    txt = open('data/6.txt', encoding='utf-8').read()
    result = word_freq(txt, 10)
    print(result)